##########################################################################################################################################################
### Setup

### Set up environment

# Clear environment
rm(list=ls())

# Load libraries
library(tidyverse)
library(magrittr)
library(abmisc)
library(data.table)
library(lme4)
library(cowplot)
library(stargazer)
library(corrplot)
library(patchwork)
library(sjPlot)
library(readstata13)
library(vtable)
library(gridExtra)
library(ggpubr)
library(ggstance)
library(xtable)
library(Matching)


### Load and format votes data

# Load votes data
votes <- fread('data/votes_modeling_data.csv', data.table = FALSE, stringsAsFactors = FALSE)

# Filter to 2010-14
votes <- filter(votes, year %in% 2010:2014)

# Make factors for plotting, recode seat_ratio so it's upper:lower rather than lower:upper
votes <- votes %>%
  mutate(lineitemveto = as.factor(lineitemveto),
         lineitemveto_appropsonly = as.factor(lineitemveto_appropsonly),
         maj_sponsor = ifelse(maj_sponsor==1, 'Majority', 'Minority') %>% as.factor() %>% relevel(ref='Minority'),
         maj_pass_override = ifelse(maj_pass_override==1, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         divided = ifelse(divided==1, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         divided3 = ifelse(divided=='Yes', 'Divided', 'Unified') %>% as.factor() %>% relevel(ref='Unified'),
         lineitemveto_plot = ifelse(lineitemveto==1, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         trifecta_plot = ifelse(trifecta==1, 'Full Control', 'Not Full Control') %>% as.factor() %>% relevel(ref='Not Full Control'),
         term_diff2 = ifelse(term_diff==2, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         seat_ratio = 1/seat_ratio
           ) %>%
  dplyr::select(-bicam_representation)

# Specify states to remove because of high missingness
high_missingness <- c('AZ', 'DE', 'FL', 'IL', 'IN', 'KY', 'MI', 'MO', 'MT', 'ND', 'OK', 'OR', 'RI', 'TX', 'WA')


### Load and format bills data

# Load data
bill <- fread('data/combined_modeling_data.csv', data.table = FALSE, stringsAsFactors = FALSE)

# Filter to 2010-14
bill <- filter(bill, year %in% 2010:2014)

# Make factors for plotting, recode seat_ratio so it's upper:lower rather than upper:lower
bill <- bill %>%
  mutate(lineitemveto = as.factor(lineitemveto),
         lineitemveto_appropsonly = as.factor(lineitemveto_appropsonly),
         maj_sponsor = ifelse(maj_sponsor==1, 'Majority', 'Minority') %>% as.factor() %>% relevel(ref='Minority'),
         maj_pass_override = ifelse(maj_pass_override==1, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         divided = ifelse(divided==1, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         divided3 = ifelse(divided=='Yes', 'Divided', 'Unified') %>% as.factor() %>% relevel(ref='Unified'),
         lineitemveto_plot = ifelse(lineitemveto==1, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         trifecta_plot = ifelse(trifecta==1, 'Full Control', 'Not Full Control') %>% as.factor() %>% relevel(ref='Not Full Control'),
         term_diff2=ifelse(term_diff==2, 'Yes', 'No') %>% as.factor() %>% relevel(ref='No'),
         seat_ratio = 1/seat_ratio,
         maj_diff = abs(maj_diff)
  ) %>%
  dplyr::select(-bicam_representation)


### Load, format, and join bipartisan representation variable from Makse data

# Load
bicam <- read_csv("data/BDASL_States.csv")

# Remove country observation
bicam %<>% filter(state!='US')

# Make state-year bicameral representation dataframe
br <- dplyr::select(bicam, c(state, matches('^br') & matches(2009:2020 %>% char) )) %>%
  rename_with(., ~ str_replace(., 'br', '')) %>%
  mutate(`2010`=`2009`, `2012`=`2011`, `2014`=`2013`,
         `2016`=`2015`, `2018`=`2017`) %>% 
  pivot_longer(cols=-state, names_to='year') %>%
  dplyr::rename(bipart_rep = value) %>%
  mutate(year=num(year))

# Join to bills and votes
bill <- left_join(bill, br, by=c('state', 'year'))
votes <- left_join(votes, br, by=c('state', 'year'))


### Adding maj_diff to votes

votes$maj_diff <- bill$maj_diff[match(votes$bill_id, bill$bill_id)]



### Add in legislative professionalism

# Load
prof <- read.dta13("data/legprof-components.v1.2draft.dta")

# Filter to the date range of our bills and votes data (2010:2014, but we need 2015 for doubling up state-year)
prof <- filter(prof, year %in% 2010:2015)

# Combine into two-year terms (2010-11, 2012-13, 2014)
for(ii in 1:nrow(prof)){
  row_dat <- prof[ii,]
  row_dat <- row_dat %>%
    mutate(year = ifelse(year==2010, yes = 2011, 
                         no = ifelse(year==2011, yes = 2010, 
                                     no = ifelse(year==2012, yes = 2013, 
                                                 no = ifelse(year==2013, yes=2012, 
                                                             no = ifelse(year==2014, yes = 2015, 
                                                                         no = ifelse(year==2014, yes=2014, no=year)))))))
  prof <- rbind(prof, row_dat)
}

# Add state-year variable
prof <- prof %>%
  mutate(state_year = paste(stateabv, year, sep='_'))

# Select relevant variables
prof <- dplyr::select(prof, c(state_year, mds1, mds2))

# Join to bill and votes
votes <- left_join(votes, prof, by='state_year')
bill <- left_join(bill, prof, by='state_year')


### Add indicator for whether the majority has enough votes to override a veto

# Load data, subset to relevant state-years
override <- assignLoad("data/vetoOverrideVar.RData") %>%
  filter(state_year %in% bill$state_year)

# Filter to necessary variables
override <- dplyr::select(override, c(state_year, gov_party_holds_veto_override, gov_party_holds_veto_override_sen)) %>%
  dplyr::rename(gov_pty_override_house = gov_party_holds_veto_override,
                gov_pty_override_sen = gov_party_holds_veto_override_sen) 

# Filter out duplicates (data was at the chamber level even though the variables aren't)
override <- distinct(override)

# Join to bill, votes
bill <- left_join(bill, override, by='state_year')
votes <- left_join(votes, override, by='state_year')

# Match to correct chamber
bill <- bill %>%
  mutate(gov_pty_override = ifelse(chamber=='H', gov_pty_override_house,
                                               ifelse(chamber=='S', gov_pty_override_sen, NA))) %>%
  dplyr::select(-c(gov_pty_override_house, gov_pty_override_sen))
votes <- votes %>%
  mutate(gov_pty_override = ifelse(chamber=='H', gov_pty_override_house,
                                   ifelse(chamber=='S', gov_pty_override_sen, NA))) %>%
  dplyr::select(-c(gov_pty_override_house, gov_pty_override_sen))


### Add SD of Shor/McCarty scores as a measure of party unity

# Load 
smi <- assignLoad("data/processedLong_shorMccarty.RData")

# Push year to numeric
smi <- smi %>%
  mutate(year=num(year)) %>%
  
# Filter to years in bills
  filter(year %in% unique(bill$year))

# Make state_year variable
smi <- mutate(smi, state_year = paste(st, year, sep='_'))

# Make chamber variable
smi <- smi %>%
  mutate(chamber = ifelse(str_detect(legiscan_district, "^HD"), 'H',
                                     ifelse(str_detect(legiscan_district, "^SD"), 'S', NA)))

# Compute standard deviation for each party in each state_year
sm_sd <- smi %>%
  filter(party %in% c("D", "R")) %>%
  group_by(state_year, chamber, party) %>%
  dplyr::summarize(
    sd_unity = sd(np_score, na.rm=TRUE)
  ) %>%
  ungroup()

# Pivot wider to get unity for each party
sm_sd <- pivot_wider(sm_sd, names_from=party, values_from=sd_unity) %>%
  dplyr::rename(sd_unity_D=D, sd_unity_R=R)

# Join to bill, votes
bill <- left_join(bill, sm_sd, by=c('state_year', 'chamber'))
votes <- left_join(votes, sm_sd, by=c('state_year', 'chamber'))

# Match to majority party unity
bill <- bill %>%
  mutate(maj_unity = ifelse(maj_pty=='D', sd_unity_D,
                            ifelse(maj_pty=='R', sd_unity_R, NA))) %>%
  dplyr::select(-c(sd_unity_D, sd_unity_R))
votes <- votes %>%
  mutate(maj_unity = ifelse(maj_pty=='D', sd_unity_D,
                            ifelse(maj_pty=='R', sd_unity_R, NA))) %>%
  dplyr::select(-c(sd_unity_D, sd_unity_R))


### Add non-trifecta divided government variable

# Load party data, add divided indicator and reformat state name
pty <- assignLoad("data/partyControl.RData") %>% 
  mutate(divided = ifelse(legi_control %in% c("Divided", "Divided*", "Split", "Split*"), 1, 0),
         state = state.abb[match(state, state.name)])

# Add to bill, votes
bill <- left_join(dplyr::select(bill, -divided),
                  dplyr::select(pty, c(state, year, divided)),
                  by=c('state', 'year')) %>%
  mutate(divided=ifelse(divided==1, 'Divided', 'Unified') %>% as.factor() %>% relevel(ref='Unified'))
votes <- left_join(dplyr::select(votes, -divided),
                   dplyr::select(pty, c(state, year, divided)),
                   by=c('state', 'year'))%>%
  mutate(divided=ifelse(divided==1, 'Divided', 'Unified') %>% as.factor() %>% relevel(ref='Unified'))
